source(here::here("code", "scripts", "20200318_notebook_source.R"))
Loading required package: ape
Loading required package: adegenet
Loading required package: ade4
/// adegenet 2.1.3 is loaded ////////////
> overview: '?adegenet'
> tutorials/doc/questions: 'adegenetWeb()'
> bug reports/feature requests: adegenetIssues()
Attaching package: ‘pegas’
The following object is masked from ‘package:ade4’:
amova
The following object is masked from ‘package:ape’:
mst
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ─────────────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.1 ✓ purrr 0.3.4
✓ tibble 3.0.1 ✓ dplyr 1.0.0
✓ tidyr 1.1.0 ✓ stringr 1.4.0
✓ readr 1.3.1 ✓ forcats 0.5.0
── Conflicts ────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
20200130
• 1KG final release VCFs: ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/ • Educational attainment paper: https://www.nature.com/articles/s41588-018-0147-3 • Lead loci from that paper: racist_hypothesis/data/20200130_iq_gwas_topf.csv
Papers:
• Berg J. J., Coop G., 2014 ‘A population genetic signal of polygenic adaptation’. PLoS Genetics 10: e1004412 • Also their 2017 paper https://www.biorxiv.org/content/10.1101/167551v4 • Spiedel et al on a tool for inferring genealogy (and hence changes in allele frequencies, hence selection): https://www.nature.com/articles/s41588-019-0484-x
Home: /hps/research1/birney/users/ian/rac_hyp
mkcd vcfs
# download VCF and index
wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5b.20130502.sites.vcf.gz
wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5b.20130502.sites.vcf.gz.tbi
cd /nfs/software/birney
wget https://github.com/broadinstitute/gatk/releases/download/4.1.4.1/gatk-4.1.4.1.zip
unzip gatk-4.1.4.1.zip
# amend aliases in ~/.bashrc and ~/.bash_profile
export PATH=$PATH:/nfs/software/birney/gatk-4.1.4.1/
mkcd refs
# download FASTA
wget ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.toplevel.fa.gz
# create dictionary follwoing guiadance here: <https://gatkforums.broadinstitute.org/gatk/discussion/1601/how-can-i-prepare-a-fasta-file-to-use-as-reference>
java -jar /nfs/software/birney/picard-2.9.0/picard.jar CreateSequenceDictionary \
R=refs/Homo_sapiens.GRCh38.dna.toplevel.fa.gz \
O=refs/Homo_sapiens.GRCh38.dna.toplevel.dict
# create fasta index file
/nfs/software/birney/samtools-1.9/samtools faidx refs/Homo_sapiens.GRCh38.dna.toplevel.fa.gz
# [E::fai_build3_core] Cannot index files compressed with gzip, please use bgzip
## unzip
gunzip refs/Homo_sapiens.GRCh38.dna.toplevel.fa.gz
## create dictionary again
rm refs/Homo_sapiens.GRCh38.dna.toplevel.dict
java -jar /nfs/software/birney/picard-2.9.0/picard.jar CreateSequenceDictionary \
R=refs/Homo_sapiens.GRCh38.dna.toplevel.fa \
O=refs/Homo_sapiens.GRCh38.dna.toplevel.dict
# create gast index file
/nfs/software/birney/samtools-1.9/samtools faidx refs/Homo_sapiens.GRCh38.dna.toplevel.fa
racist_hypothesis/data/20200204_snps.listcut racist_hypothesis/data/20200204_iq_gwas_topf -f5 -d"," | sed 's/"//g' | tail -n+2 > racist_hypothesis/data/20200204_snps.list
gatk SelectVariants \
-R refs/Homo_sapiens.GRCh38.dna.toplevel.fa \
-V vcfs/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5b.20130502.sites.vcf.gz \
--keep-ids racist_hypothesis/data/20200204_snps.list \
-O vcfs/ALL.hits.vcf.gz
# Error initializing feature reader for path vcfs/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5b.20130502.sites.vcf.gz
# Caused by: htsjdk.tribble.TribbleException$MalformedFeatureFile: Unable to parse header with error: Invalid GZIP header, for input source: vcfs/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5b.20130502.sites.vcf.gz
This VCF doesn’t have the per-sample calls, but just the allele frequencies for each continental population.
Have to download all the VCFs on the page, then put them together.
wget -r -p -k --no-parent -cut-dirs=5 ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/
find vcfs/ftp.1000genomes.ebi.ac.uk/ALL.chr*.vcf.gz > racist_hypothesis/data/20200205_vcfs.list
java -jar /nfs/software/birney/picard-2.9.0/picard.jar MergeVcfs \
I=racist_hypothesis/data/20200205_vcfs.list \
O=vcfs/1gk_all.vcf.gz
# Exception in thread "main" java.lang.IllegalArgumentException: The contig entries in input file /hps/research1/birney/users/ian/rac_hyp/vcfs/ftp.1000genomes.ebi.ac.uk/ALL.chrMT.phase3_callmom-v0_4.20130502.genotypes.vcf.gz are not compatible with the others.
# So remove that one from list above
sed -i '/MT/d' racist_hypothesis/data/20200205_vcfs.list
# run MergeVCFs again
java -jar /nfs/software/birney/picard-2.9.0/picard.jar MergeVcfs \
I=racist_hypothesis/data/20200205_vcfs.list \
O=vcfs/1gk_all.vcf.gz
# Exception in thread "main" java.lang.IllegalArgumentException: The contig entries in input file /hps/research1/birney/users/ian/rac_hyp/vcfs/ftp.1000genomes.ebi.ac.uk/ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf.gz are not compatible with the others.
sed -i '/chrY/d' racist_hypothesis/data/20200205_vcfs.list
# run MergeVCFs again
java -jar /nfs/software/birney/picard-2.9.0/picard.jar MergeVcfs \
I=racist_hypothesis/data/20200205_vcfs.list \
O=vcfs/1gk_all.vcf.gz
# WORKS
# Find out which reference is used by the callset
bcftools view vcfs/1gk_all.vcf.gz | less
# ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz
# download that reference
cd refs
wget ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz
# create index
/nfs/software/birney/samtools-1.9/samtools faidx refs/hs37d5.fa.gz
# create dictionary
java -jar /nfs/software/birney/picard-2.9.0/picard.jar CreateSequenceDictionary \
R=refs/hs37d5.fa.gz \
O=refs/hs37d5.dict
gatk SelectVariants \
-R refs/hs37d5.fa.gz \
-V vcfs/1gk_all.vcf.gz \
--keep-ids racist_hypothesis/data/20200204_snps.list \
-O vcfs/snp_hits.vcf.gz
# SUCCESS
cp vcfs/snp_hits.vcf.gz racist_hypothesis/data/20200303_snp_hits.vcf.gz
cp vcfs/snp_hits.vcf.gz.tbi racist_hypothesis/data/20200303_snp_hits.vcf.gz.tbi
snp_hits <- pegas::read.vcf(here::here("data", "20200303_snp_hits.vcf.gz"))
From here: http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/working/20130606_sample_info/20130606_sample_info.xlsx (link embedded in this page: <www.internationalgenome.org/data>)
meta <- read_xlsx(here::here("data", "20130606_sample_info.xlsx"), sheet = "Sample Info") %>% dplyr::select(Sample, Population, Gender)
Expecting logical in N1378 / R1378C14: got 'HG02624'Expecting logical in N1387 / R1387C14: got 'HG02624, HG02610, HG02666'Expecting logical in N1417 / R1417C14: got 'HG02666, HG02624'Expecting logical in N1426 / R1426C14: got 'HG02682'Expecting logical in N1427 / R1427C14: got 'HG02681'Expecting logical in N1432 / R1432C14: got 'HG03705'Expecting logical in N1442 / R1442C14: got 'HG02700'Expecting logical in N1443 / R1443C14: got 'HG02699'Expecting logical in N1642 / R1642C14: got 'HG03097'Expecting logical in N1658 / R1658C14: got 'HG03073'Expecting logical in N1734 / R1734C14: got 'HG03229'Expecting logical in N1735 / R1735C14: got 'HG03228'Expecting logical in N1758 / R1758C14: got 'HG03271'Expecting logical in N1761 / R1761C14: got 'HG03268'Expecting logical in N1774 / R1774C14: got 'HG03372'Expecting logical in N1791 / R1791C14: got 'HG03352'Expecting logical in N1795 / R1795C14: got 'HG03366, HG03343'Expecting logical in N1801 / R1801C14: got 'HG03352'Expecting logical in N1807 / R1807C14: got 'HG03301'Expecting logical in N1812 / R1812C14: got 'HG03383'Expecting logical in N1815 / R1815C14: got 'HG03383'Expecting logical in N1832 / R1832C14: got 'HG03457'Expecting logical in L1838 / R1838C12: got 'HG03471'Expecting logical in N1848 / R1848C14: got 'HG03457'Expecting logical in N1849 / R1849C14: got 'HG03457'Expecting logical in N1853 / R1853C14: got 'HG03457'Expecting logical in N1857 / R1857C14: got 'HG03484'Expecting logical in N1862 / R1862C14: got 'HG03480, HG03484'Expecting logical in L1863 / R1863C12: got 'HG03438'Expecting logical in N1871 / R1871C14: got 'HG03469, HG03484'Expecting logical in N1872 / R1872C14: got 'HG03478, HG03469, HG03480'Expecting logical in N1899 / R1899C14: got 'HG03566'Expecting logical in N1902 / R1902C14: got 'HG03574'Expecting logical in N1909 / R1909C14: got 'HG03547'Expecting logical in N1914 / R1914C14: got 'HG03556'Expecting logical in N2007 / R2007C14: got 'HG02687'Expecting logical in N2365 / R2365C14: got 'NA07022, NA07056, first cous once removed'Expecting logical in N2372 / R2372C14: got 'NA06993 (1st cous once removed)'Expecting logical in N2381 / R2381C14: got 'NA06993 (1st cous once removed)'Expecting logical in N2457 / R2457C14: got 'NA12264 (1st cous once removed)'Expecting logical in N2463 / R2463C14: got 'NA12155 (1st cous once removed)'Expecting logical in H2561 / R2561C8: got 'NA18510'Expecting logical in H2562 / R2562C8: got 'NA18509'Expecting logical in H2711 / R2711C8: got 'NA19252'Expecting logical in L2716 / R2716C12: got 'NA19240'Expecting logical in N2818 / R2818C14: got 'NA19022, NA19045'Expecting logical in N2927 / R2927C14: got 'NA19178 (1st cous once removed)'Expecting logical in H2957 / R2957C8: got 'NA18908'Expecting logical in N2981 / R2981C14: got 'NA19334'Expecting logical in N2983 / R2983C14: got 'NA19331'Expecting logical in L3002 / R3002C12: got 'NA19382'Expecting logical in L3004 / R3004C12: got 'NA19380'Expecting logical in N3006 / R3006C14: got 'NA19025'Expecting logical in N3022 / R3022C14: got 'NA19037, NA19039'Expecting logical in H3205 / R3205C8: got 'NA19714'Expecting logical in L3215 / R3215C12: got 'NA20284,NA20285'Expecting logical in H3216 / R3216C8: got 'NA20284'Expecting logical in H3218 / R3218C8: got 'NA20281'Expecting logical in L3218 / R3218C12: got 'NA20279'Expecting logical in L3219 / R3219C12: got 'NA20279'Expecting logical in H3230 / R3230C8: got 'NA20300'Expecting logical in L3259 / R3259C12: got 'NA20363'Expecting logical in L3272 / R3272C12: got 'NA20347'Expecting logical in N3477 / R3477C14: got 'NA21134'Expecting logical in N3495 / R3495C14: got 'NA21114'
# create population vector
populations <- unlist(lapply(rownames(snp_hits), function(sample){
meta$Population[meta$Sample == sample]
}))
# add to snp_hits
snp_hits$population <- populations
# reorder
snp_hits <- snp_hits %>% dplyr::select(population, everything())
# split by population
snp_hits_split <- split(snp_hits, f = snp_hits$population)
# remove population columns
snp_hits_split <- lapply(snp_hits_split, function(x){
x$population <- NULL
return(x)
})
# get allele counts
allele_counts <- lapply(snp_hits_split, function(population){
summary(population)
})
snp_al_nos <- read.delim("~/Documents/Repositories/racist_hypothesis/data/20200303_top_snps_with_alleles.txt", as.is = T)
# TEST
test <- allele_counts[["ACB"]][["rs79582714"]]
test2 <- data.frame("allele" = names(test$allele),
"count" = test$allele)
test2$al_freq <- test2$count / sum(test2$count)
z_scores <- data.frame("allele" = c(snp_al_nos$Allele1[snp_al_nos$SNP == "rs79582714"],
snp_al_nos$Allele2[snp_al_nos$SNP == "rs79582714"]),
"z_score" = c(0, snp_al_nos$Z.Score[snp_al_nos$SNP == "rs79582714"]))
test3 <- dplyr::left_join(test2, z_scores, by = "allele")
# Create function
get_alfreq_table <- function(population, snp_effects_df){
counter <- 0
population <- lapply(population, function(snp_summary){
# set counter and extract SNP ID
counter <<- counter + 1
snp_id <- names(population)[counter]
# create final DF
final_df <- data.frame("allele" = names(snp_summary$allele),
"count" = snp_summary$allele,
stringsAsFactors = F)
# get allele frequency
final_df$al_freq <- final_df$count / sum(final_df$count)
# pull out z-scores and p-values from snp_effects_df
z_scores <- data.frame("allele" = c(snp_effects_df$Allele1[snp_effects_df$SNP == snp_id],
snp_effects_df$Allele2[snp_effects_df$SNP == snp_id]),
"z_score" = c(0, snp_effects_df$Z.Score[snp_effects_df$SNP == snp_id]),
"p_value" = c(0, snp_effects_df$P[snp_effects_df$SNP == snp_id]),
stringsAsFactors = F)
# join DFs. Note that it only joins the two alleles in the snp_effects_df
final_df <- dplyr::left_join(z_scores, final_df, by = "allele")
return(final_df)
})
return(population)
}
# Run over list
alcnt_df_lst <- lapply(allele_counts, function(x){
out <- get_alfreq_table(x, snp_al_nos)
final <- dplyr::bind_rows(out, .id = "rsid")
return(final)
})
# create final DF
alcnt_df <- dplyr::bind_rows(alcnt_df_lst, .id = "population")
# filter only for alleles with an efffect
alcnt_df_filt <- alcnt_df[alcnt_df$z_score != 0, ]
# set new rownames
rownames(alcnt_df_filt) <- seq(1:nrow(alcnt_df_filt))
ggplot(data = dplyr::filter(plot_df, z_score <= 0), aes(al_freq_YRI, al_freq_CEU, colour = z_score)) +
geom_point() +
scale_color_viridis_c() +
coord_fixed() +
geom_smooth(se = F) +
geom_abline(intercept = 0, slope = 1) +
ggtitle("IQ-negative alleles")
ggplot(data = dplyr::filter(plot_df, z_score >= 0), aes(al_freq_YRI, al_freq_CEU, colour = z_score)) +
geom_point() +
scale_color_viridis_c() +
coord_fixed() +
geom_smooth(se = F) +
geom_abline(intercept = 0, slope = 1) +
ggtitle("IQ-positive alleles")
ggplot(data = dplyr::filter(plot_df, z_score <= 0), aes(al_freq_YRI, al_freq_CHS, colour = z_score)) +
geom_point() +
scale_color_viridis_c() +
coord_fixed() +
geom_smooth(se = F) +
geom_abline(intercept = 0, slope = 1) +
ggtitle("IQ-negative alleles")
ggplot(data = dplyr::filter(plot_df, z_score >= 0), aes(al_freq_YRI, al_freq_CHS, colour = z_score)) +
geom_point() +
scale_color_viridis_c() +
coord_fixed() +
geom_smooth(se = F) +
geom_abline(intercept = 0, slope = 1) +
ggtitle("IQ-positive alleles")
ggplot(data = dplyr::filter(plot_df, z_score <= 0), aes(al_freq_CEU, al_freq_CHS, colour = z_score)) +
geom_point() +
scale_color_viridis_c() +
coord_fixed() +
geom_smooth(se = F) +
geom_abline(intercept = 0, slope = 1) +
ggtitle("IQ-negative alleles")
ggplot(data = dplyr::filter(plot_df, z_score >= 0), aes(al_freq_CEU, al_freq_CHS, colour = z_score)) +
geom_point() +
scale_color_viridis_c() +
coord_fixed() +
geom_smooth(se = F) +
geom_abline(intercept = 0, slope = 1) +
ggtitle("IQ-positive alleles")
test$SNP[test$Fst > 0.2]
[1] "rs1408579" "rs56151722" "rs7534501" "rs4294650" "rs1343700" "rs2420551" "rs11126396"
[8] "rs11168951" "rs28612284" "rs6124077" "rs6706409"
ggplot(data = dplyr::filter(plot_df, z_score <= 0), aes(al_freq_YRI, al_freq_CEU, colour = z_score, label = rsid)) +
geom_point() +
geom_label_repel(aes(label = ifelse(rsid %in% high_fst_snps, rsid, ''),point = 0.5)) +
#geom_text(aes(label = ifelse(rsid %in% high_fst_snps, rsid, '')), hjust = 0, vjust = 0, colour = "black") +
scale_color_viridis_c() +
coord_fixed() +
geom_smooth(se = F) +
geom_abline(intercept = 0, slope = 1) +
ggtitle("IQ-negative alleles")
Ignoring unknown aesthetics: point
# extract from excel doc
snps_eduyrs <- read_xlsx("~/Documents/Repositories/racist_hypothesis/data/20180723_Lee-et-al_supp-tables.xlsx", sheet = "2. EduYears Lead SNPs", skip = 1, n_max = 1271)
# write table of SNPs
write.table(snps_eduyrs[["SNP"]], "~/Documents/Repositories/racist_hypothesis/data/20200316_snps_eduyears.list", quote = F, row.names = F, col.names = F)
gatk SelectVariants \
-R refs/hs37d5.fa.gz \
-V vcfs/1gk_all.vcf.gz \
--keep-ids racist_hypothesis/data/20200316_snps_eduyears.list \
-O vcfs/snphits_eduyrs.vcf.gz
From the GWAS catalog here: https://www.ebi.ac.uk/gwas/efotraits/EFO_0004339.
Don’t include child trait data. Leaves 4907 associations.
Saved here: ~/Documents/Repositories/racist_hypothesis/data/20200304_raw_associations_height.csv
# pull out rsIDs
cat data/20200304_raw_associations_height.csv | cut -f1 -d'-' > tmp_rsid.txt
# pull out lines with risk alleles only
grep "<b>\w</b>" data/20200304_raw_associations_height.csv > data/20200316_raw_associations_height_edited.csv
# pull out rsIDs and paste into edited doc
cut -f1 -d'-' data/20200316_raw_associations_height_edited.csv
# note: split "Variant and risk allele" column into two manually
20200316
A bit messy. Try getting the height SNPs from this paper instead:
Yengo et al. (2018) Meta-analysis of genome-wide association studies for height and body mass index in approximately 700000 individuals of European ancestry
Data downloaded from here: https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files More specifically: https://portals.broadinstitute.org/collaboration/giant/images/e/e2/Meta-analysis_Locke_et_al%2BUKBiobank_2018_top_941_from_COJO_analysis_UPDATED.txt.gz
cd racist_hypothesis/data
# download
wget https://portals.broadinstitute.org/collaboration/giant/images/e/e2/Meta-analysis_Locke_et_al%2BUKBiobank_2018_top_941_from_COJO_analysis_UPDATED.txt.gz
# unzip
gunzip Meta-analysis_Locke_et_al%2BUKBiobank_2018_top_941_from_COJO_analysis_UPDATED.txt.gz
# rename
mv Meta-analysis_Wood_et_al+UKBiobank_2018_top_3290_from_COJO_analysis.txt 20181015_Yengo-et-al_snps_height.txt
cut -f1 20181015_Yengo-et-al_snps_height.txt | tail -n+2 > 20200318_snps_height.list
gatk SelectVariants \
-R refs/hs37d5.fa.gz \
-V vcfs/1gk_all.vcf.gz \
--keep-ids racist_hypothesis/data/20200318_snps_height.list \
-O vcfs/snphits_height.vcf.gz
20200318
Get SNPs for IBD.
Huang et al. (2017) Fine-mapping inflammatory bowel disease loci to single-variant resolution
Data downloaded here: https://www.nature.com/articles/nature22969#Sec29. More specifically, Supplementary Table 1: https://static-content.springer.com/esm/art%3A10.1038%2Fnature22969/MediaObjects/41586_2017_BFnature22969_MOESM2_ESM.xlsx Saved here: data/20170628_Huang-et-al_supp-table-1.xlsx
# extract from excel doc
snps_ibd <- read_xlsx(here::here("data", "20170628_Huang-et-al_supp-table-1.xlsx"),
sheet = "list of variants")
New names:
* trait -> trait...7
* trait -> trait...8
gatk SelectVariants \
-R refs/hs37d5.fa.gz \
-V vcfs/1gk_all.vcf.gz \
--keep-ids racist_hypothesis/data/20200319_snps_ibd_csl.list \
-O vcfs/snphits_ibd.vcf.gz
gatk SelectVariants \
-R refs/hs37d5.fa.gz \
-V vcfs/1gk_all.vcf.gz \
--keep-ids racist_hypothesis/data/20200319_snps_ibd.list \
-O vcfs/snphits_ibd_full.vcf.gz
cp vcfs/snphits* racist_hypothesis/data
20200320
From here: http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/working/20130606_sample_info/20130606_sample_info.xlsx (link embedded in this page: <www.internationalgenome.org/data>)
meta <- read_xlsx(here::here("data", "20130606_sample_info.xlsx"), sheet = "Sample Info") %>% dplyr::select(Sample, Population, Gender)
# set names
names(vcf_list) <- gsub("snphits_|.vcf.gz", "", list.files(here::here("data"), pattern = glob2rx("snphits_*.gz")))
Error in names(vcf_list) <- gsub("snphits_|.vcf.gz", "", list.files(here::here("data"), :
'names' attribute [4] must be the same length as the vector [3]
NOTE:
• In the Lee et al. (edu_years) data tables, the sheet says: “Notes: Clumping of GWAS results was performed as described in the Supplementary Notes. SNPs are ordered by P-value. Chromosome (CHR) and base pair (BP) positions are reported for human genome build 37 (hg19). "Allele 1" is the allele whose estimated effect size (Beta) and allele frequency are reported. Standard errors (SE) and P-values are derived from test statistics that have been adjusted by an estimated LDSC intercept of 1.113. The analysis is based on 10,016,265 SNPs. The average chi-squared statistic is 2.530/2.816 (adjusted and unadjusted, respectively) and lambda GC is 2.038 (unadjusted).”
• In the Yengo et al. (height) data tables, the header is self-explanatory.
• In the Huang et al. (ibd) data tables, the SNPs have odds ratios rather than betas. Also, the legend says that A0 is the reference allele and A1 is the tested allele. p_multi is the -log10(P-value) for multi-variate model.
# create empty list
snps_list <- list()
# add edu_years SNPs
snps_list[["edu_years"]] <- read_xlsx(here::here("data", "20180723_Lee-et-al_supp-tables.xlsx"), sheet = "2. EduYears Lead SNPs", skip = 1, n_max = 1271) %>%
dplyr::select(snp = "SNP",
tested_allele = "Allele 1",
other_allele = "Allele2",
effect_size = "Effect size",
p_value = "P-value")
# add height SNPs
snps_list[["height"]] <- read_delim(here::here("data", "20181015_Yengo-et-al_snps_height.txt"), delim = "\t") %>%
dplyr::select(snp = "SNP",
tested_allele = "Tested_Allele",
other_allele = "Other_Allele",
effect_size = "BETA",
p_value = "P")
Parsed with column specification:
cols(
SNP = col_character(),
CHR = col_double(),
POS = col_double(),
Tested_Allele = col_character(),
Other_Allele = col_character(),
Freq_Tested_Allele_in_HRS = col_double(),
BETA = col_double(),
SE = col_double(),
P = col_double(),
N = col_double(),
BETA_COJO = col_double(),
SE_COJO = col_double(),
P_COJO = col_double()
)
# add ibd SNPs (full list)
snps_list[["ibd_full"]] <- read_xlsx(here::here("data", "20170628_Huang-et-al_supp-table-1.xlsx"), sheet = "list of variants") %>%
dplyr::mutate(mean_or = (OR_CD + OR_UC) / 2) %>%
dplyr::select(snp = "variant",
tested_allele = "A1",
other_allele = "A0",
effect_size = "mean_or",
p_value = "P_mean_95")
New names:
* trait -> trait...7
* trait -> trait...8
# add ibd SNPS - note the "effect size" here is the mean odds ratio across both CD and UC
snps_list[["ibd"]] <- read_xlsx(here::here("data", "20170628_Huang-et-al_supp-table-1.xlsx"), sheet = "list of credible sets") %>%
dplyr::mutate(mean_or = (OR_CD + OR_UC) / 2) %>%
dplyr::select(snp = "variant.lead",
tested_allele = "A1",
other_allele = "A0",
effect_size = "mean_or",
p_value = "p_multi")
# filter
snps_list[["ibd_full"]] <- dplyr::slice(snps_list[["ibd_full"]], (ibd_indels * -1))
Warning messages:
1: Unknown or uninitialised column: 'SNP'.
2: Unknown or uninitialised column: 'SNP'.
3: Unknown or uninitialised column: 'SNP'.
4: Unknown or uninitialised column: 'SNP'.
# Incorporate into function:
get_alfreq_table <- function(population, snp_df){
# take only the SNPs that are also in the snp_df
indexes_to_keep <- which(names(population) %in% snp_df$snp == T)
population <- population[indexes_to_keep]
# start loop
counter <- 0
population <- lapply(population, function(snp_summary){
# set counter and extract SNP ID
counter <<- counter + 1
snp_id <- names(population)[counter]
# create final DF
final_df <- data.frame("allele" = names(snp_summary$allele),
"count" = snp_summary$allele,
stringsAsFactors = F)
# get allele frequency
final_df$al_freq <- final_df$count / sum(final_df$count)
# pull out effect sizes and p-values from snp_df
effect_sizes <- data.frame("allele" = c(snp_df$other_allele[snp_df$snp == snp_id],
snp_df$tested_allele[snp_df$snp == snp_id]),
"effect_size" = c(0, snp_df$effect_size[snp_df$snp == snp_id]),
"p_value" = c(0, snp_df$p_value[snp_df$snp == snp_id]),
stringsAsFactors = F)
# join DFs. Note that it only joins the two alleles in the snp_df
final_df <- dplyr::left_join(effect_sizes, final_df, by = "allele")
# remove any rows with NA (caused by having a third allele)
final_df <- final_df[complete.cases(final_df), ]
# convert negative effects sizes into positive ones by flipping the allele
if(any(final_df$effect_size < 0)){
final_df$effect_size[final_df$effect_size == 0] <- final_df$effect_size[final_df$effect_size < 0] * (-1)
final_df$effect_size[final_df$effect_size < 0] <- 0
}
return(final_df)
})
return(population)
}
Warning message:
Unknown or uninitialised column: 'SNP'.
plot_df_lst <- lapply(alcnt_df_filt, function(pheno){
pheno <- pheno %>% dplyr::select(-count)
pheno <- tidyr::pivot_wider(data = pheno,
names_from = population,
names_prefix = "al_freq_",
values_from = al_freq)
return(pheno)
})
# Colour palettes
colour_pals <- c("viridis", "inferno", "plasma")
# Line colours
#line_cols <- c("red", "red", "blue")
# Titles
titles <- c("Educational Attainment", "Height", "Inflammatory Bowel Disease")
# Legend title for effect size
legend_title <- c("Effect size (Beta)", "Effect size (Beta)", "Mean odds ratio (CD and UC)")
counter <- 0
Warning messages:
1: Unknown or uninitialised column: 'SNP'.
2: Unknown or uninitialised column: 'SNP'.
3: Unknown or uninitialised column: 'SNP'.
4: Unknown or uninitialised column: 'SNP'.
lapply(plot_df_lst, function(pheno){
counter <<- counter + 1
ggplot(pheno, aes(al_freq_YRI, al_freq_CEU, colour = effect_size)) +
geom_point() +
scale_color_viridis_c(option = colour_pals[counter]) +
coord_fixed() +
geom_smooth(se = F, colour = line_cols[counter]) +
geom_abline(intercept = 0, slope = 1, colour = "blue") +
xlab("Allele frequency in YRI") +
ylab("Allele frequency in CEU") +
labs(title = titles[counter],
colour = legend_title[counter])
})
$eduyrs
$height
$ibd_full
counter <- 0
lapply(plot_df_lst, function(pheno){
counter <<- counter + 1
ggplot(pheno, aes(al_freq_YRI, al_freq_CHS, colour = effect_size)) +
geom_point() +
scale_color_viridis_c(option = colour_pals[counter]) +
coord_fixed() +
geom_smooth(se = F, colour = line_cols[counter]) +
geom_abline(intercept = 0, slope = 1, colour = "blue") +
xlab("Allele frequency in YRI") +
ylab("Allele frequency in CHS") +
labs(title = titles[counter],
colour = legend_title[counter])
})
$eduyrs
$height
$ibd_full
# Try without population column
vcf_list_raw <- lapply(target_vcfs, function(vcf_file){
vcf_out <- pegas::read.vcf(vcf_file)
})
Reading 100 / 10000 loci
Reading 200 / 10000 loci
Reading 300 / 10000 loci
Reading 400 / 10000 loci
Reading 500 / 10000 loci
Reading 600 / 10000 loci
Reading 700 / 10000 loci
Reading 800 / 10000 loci
Reading 900 / 10000 loci
Reading 1000 / 10000 loci
Reading 1100 / 10000 loci
Reading 1200 / 10000 loci
Reading 1270 / 1270 loci.
Done.
Reading 100 / 10000 loci
Reading 200 / 10000 loci
Reading 300 / 10000 loci
Reading 400 / 10000 loci
Reading 500 / 10000 loci
Reading 600 / 10000 loci
Reading 700 / 10000 loci
Reading 800 / 10000 loci
Reading 900 / 10000 loci
Reading 1000 / 10000 loci
Reading 1100 / 10000 loci
Reading 1200 / 10000 loci
Reading 1300 / 10000 loci
Reading 1400 / 10000 loci
Reading 1500 / 10000 loci
Reading 1600 / 10000 loci
Reading 1700 / 10000 loci
Reading 1800 / 10000 loci
Reading 1900 / 10000 loci
Reading 2000 / 10000 loci
Reading 2100 / 10000 loci
Reading 2200 / 10000 loci
Reading 2300 / 10000 loci
Reading 2400 / 10000 loci
Reading 2500 / 10000 loci
Reading 2600 / 10000 loci
Reading 2700 / 10000 loci
Reading 2800 / 10000 loci
Reading 2900 / 10000 loci
Reading 3000 / 10000 loci
Reading 3100 / 10000 loci
Reading 3200 / 10000 loci
Reading 3277 / 3277 loci.
Done.
Reading 100 / 10000 loci
Reading 200 / 10000 loci
Reading 300 / 10000 loci
Reading 400 / 10000 loci
Reading 500 / 10000 loci
Reading 600 / 10000 loci
Reading 700 / 10000 loci
Reading 800 / 10000 loci
Reading 900 / 10000 loci
Reading 1000 / 10000 loci
Reading 1100 / 10000 loci
Reading 1200 / 10000 loci
Reading 1300 / 10000 loci
Reading 1400 / 10000 loci
Reading 1500 / 10000 loci
Reading 1600 / 10000 loci
Reading 1700 / 10000 loci
Reading 1800 / 10000 loci
Reading 1900 / 10000 loci
Reading 2000 / 10000 loci
Reading 2100 / 10000 loci
Reading 2200 / 10000 loci
Reading 2300 / 10000 loci
Reading 2400 / 10000 loci
Reading 2500 / 10000 loci
Reading 2600 / 10000 loci
Reading 2700 / 10000 loci
Reading 2800 / 10000 loci
Reading 2900 / 10000 loci
Reading 3000 / 10000 loci
Reading 3100 / 10000 loci
Reading 3200 / 10000 loci
Reading 3300 / 10000 loci
Reading 3400 / 10000 loci
Reading 3500 / 10000 loci
Reading 3600 / 10000 loci
Reading 3700 / 10000 loci
Reading 3800 / 10000 loci
Reading 3900 / 10000 loci
Reading 4000 / 10000 loci
Reading 4039 / 4039 loci.
Done.
ggplot(fst_out_df, aes(Fst, fill = phenotype)) +
geom_density(alpha = 0.7) +
labs(fill = "Phenotype") +
ylab("Density") +
theme_bw()
Warning message:
Unknown or uninitialised column: 'SNP'.
20200330
From EB: “We need a stronger positive control - perhaps can you get skin pigmentation loci?”
• Crawford et al. (2017) Loci associated with skin pigmentation identified in African populations, Science: https://science.sciencemag.org/content/358/6365/eaan8433.abstract?casa_token=6tGEjct1nUQAAAAA:IL7LCz-xQ9l6rLhxBk5VcGBjwTrEa5UpAlC-nCl2mvcASu4iZbWMiu_Uj8YUHIISgCibOd1ya25sOQ. Take table 1, and manually transform ‘Ancestral>Divided’ column into new columns titled ‘tested_allele’ (the allele in bold) and ‘other_allele’. Save here: data/20171117_Crawford-et-al_Table-1.xlsx • Adhikari et al. (2019) A GWAS in Latin Americans highlights the convergent evolution of lighter skin pigmentation in Eurasia, Nature: https://www.nature.com/articles/s41467-018-08147-0. “Summary statistics from the GWAS analyses is deposited at GWAS central with the link http://www.gwascentral.org/study/HGVST3308”. Under the ‘Association results’ tab, there is one dataset for each of the 6 phenotypes tested:
- Melanin index - Hair color - Eye color - Digital eye color: L (lightness) - Digital eye color: C (chroma) - Digital eye color: cosH (cosine of hue) Difficult to ascertain which ones had genome-wide significance. Instead, pull tables directly from paper and supplementary materials, and put here in different sheets: data/20190121_Adhikari-et-al_snps.xlsx - Table 1: 18 lead SNPs from paper, each with a different p-value for one of the 6 phenotypes. - supp_table_6: 11 SNPs associated with combined traits. - supp_table_12: 161 SNPs collagted from published association studies on pigmentation. See table for references, which were used to identify other pigmentation GWAS studies. • Hernandez-Pacheco et al. (2017) Identification of a novel locus associated with skin colour in African-admixed populations, Scientific Reports: https://www.nature.com/articles/srep44548. 9 hits with genome-wide significance here: data/20170316_Hernandez-Pacheco-et-al.xlsx.
Others compiled into the single XLSX doc data/20200622_pigmentation_snps.xlsx: • ‘20190321_Jonnalagadda-et-al’: Jonnalagadda et al. (2019) A Genome-Wide Association Study of Skin and Iris Pigmentation among Individuals of South Asian Ancestry, Genome Biology and Evolution: https://academic.oup.com/gbe/article/11/4/1066/5416147. Took 9 SNPs associated with iris colour from Table 1, and 14 SNPs described in previous studies from Table 2. • ‘20171130_Martin-et-al’: Martin et al. (2017) An Unexpectedly Complex Architecture for Skin Pigmentation in Africans, Cell: https://www.cell.com/cell/fulltext/S0092-8674%2817%2931324-7. Took all 42 SNPs from Table S6A in Supplemental Information here: https://www.cell.com/cms/10.1016/j.cell.2017.11.015/attachment/eccead83-1a96-4444-9032-f968ee481d15/mmc2.xlsx. • ‘20150512_Liu-et-al’: Liu et al. (2015) Genetics of skin color variation in Europeans: genome-wide association studies with functional follow-up, Human Genetics: https://link.springer.com/article/10.1007%2Fs00439-015-1559-0. Took all 9 SNPs from Table 1. • ‘20121031_Candille-et-al’: Candille et al. (2012) Genome-wide association studies of quantitatively measured skin, hair, and eye pigmentation in four European populations, PLoS One: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0048294. Took all 8 SNPs from Table 2. • ‘20100614_Gerstenblith-et-al’: Gerstenblith et al. (2010) Genome-wide association studies of pigmentation and skin cancer: a review and meta-analysis, Pigment Cell Melanoma Research: https://onlinelibrary.wiley.com/doi/full/10.1111/j.1755-148X.2010.00730.x. Took all 39 SNPs from Table 4. • ‘20080516_Han-et-al’: Han et al. (2008) A Genome-Wide Association Study Identifies Novel Alleles Associated with Hair Color and Skin Pigmentation, PLOS Genetics: https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1000074. Took all 38 SNPs from Table 2.
pig_snps <- list()
# Crawford
pig_snps[["crawford"]] <- readxl::read_excel(here("data", "20171117_Crawford-et-al_Table-1.xlsx")) %>%
dplyr::select(rsid = "RSID", everything()) %>%
dplyr::filter(!is.na(rsid),
!rsid == ".")
# Adhikari
pig_snps[["adhikari_tbl_1"]] <- readxl::read_excel(here("data", "20190121_Adhikari-et-al_snps.xlsx"),
sheet = "Table 1",
skip = 1) %>%
dplyr::select(rsid = "rsID", everything()) %>%
dplyr::filter(!is.na(rsid))
pig_snps[["adhikari_supp_6"]] <- readxl::read_excel(here("data", "20190121_Adhikari-et-al_snps.xlsx"),
sheet = "supp_table_6") %>%
dplyr::select(rsid = "SNP", everything()) %>%
dplyr::filter(!is.na(rsid))
pig_snps[["adhikari_supp_12"]] <- readxl::read_excel(here("data", "20190121_Adhikari-et-al_snps.xlsx"),
sheet = "supp_table_12") %>%
dplyr::select(rsid = "SNP", everything()) %>%
dplyr::filter(!is.na(rsid))
# Hernandez-Pacheco
pig_snps[["hernandez-pacheco"]] <- readxl::read_excel(here("data", "20170316_Hernandez-Pacheco-et-al.xlsx"))
# Doc with SNPs from multiple studies
sheet_names <- readxl::excel_sheets(here("data", "20200622_pigmentation_snps.xlsx"))
compiled_snps <- lapply(sheet_names, function(x){
x <- readxl::read_excel(here("data", "20200622_pigmentation_snps.xlsx"),
sheet = x)
})
names(compiled_snps) <- sheet_names
# Combine
pig_snps <- c(pig_snps, compiled_snps)
# How many total SNPs
sum(unlist(lapply(pig_snps, nrow)))
[1] 414
pig_df <- lapply(pig_snps, function(x) dplyr::select(x, rsid))
pig_df <- dplyr::bind_rows(pig_df)
pig_df <- unique(pig_df)
nrow(pig_df)
[1] 266
write.table(x = pig_df$rsid,
file = here::here("data", "20200622_snps_pig.list"),
quote = F,
row.names = F,
col.names = F)
gatk SelectVariants \
-R refs/hs37d5.fa.gz \
-V vcfs/1gk_all.vcf.gz \
--keep-ids racist_hypothesis/data/20200622_snps_pig.list \
-O vcfs/snphits_pig.vcf.gz
cp vcfs/snphits_pig.vcf.gz* racist_hypothesis/data/
meta <- read_xlsx(here::here("data", "20130606_sample_info.xlsx"), sheet = "Sample Info") %>% dplyr::select(Sample, Population, Gender)
# read in VCFs and get allele counts
vcf_list <- lapply(target_vcfs, function(vcf_file){
# read in VCFs
vcf_out <- pegas::read.vcf(vcf_file)
# create population column
populations <- unlist(lapply(rownames(vcf_out), function(sample){
meta$Population[meta$Sample == sample]
}))
vcf_out$population <- populations
# reorder
vcf_out <- vcf_out %>% dplyr::select(population, everything())
# split by population
vcf_out_split <- split(vcf_out, f = vcf_out$population)
# remove population columns
vcf_out_split <- lapply(vcf_out_split, function(x){
x$population <- NULL
return(x)
})
# # get allele counts
# allele_counts <- lapply(vcf_out_split, function(population){
# summary(population)
# })
# return(allele_counts)
})
File apparently not yet accessed:
Scanning file /Users/brettell/Documents/Repositories/racist_hypothesis/data/snphits_eduyrs.vcf.gz
12.97124 Mb
Done.
Reading 100 / 10000 loci
Reading 200 / 10000 loci
Reading 300 / 10000 loci
Reading 400 / 10000 loci
Reading 500 / 10000 loci
Reading 600 / 10000 loci
Reading 700 / 10000 loci
Reading 800 / 10000 loci
Reading 900 / 10000 loci
Reading 1000 / 10000 loci
Reading 1100 / 10000 loci
Reading 1200 / 10000 loci
Reading 1270 / 1270 loci.
Done.
File apparently not yet accessed:
Scanning file /Users/brettell/Documents/Repositories/racist_hypothesis/data/snphits_height.vcf.gz
33.40984 Mb
Done.
Reading 100 / 10000 loci
Reading 200 / 10000 loci
Reading 300 / 10000 loci
Reading 400 / 10000 loci
Reading 500 / 10000 loci
Reading 600 / 10000 loci
Reading 700 / 10000 loci
Reading 800 / 10000 loci
Reading 900 / 10000 loci
Reading 1000 / 10000 loci
Reading 1100 / 10000 loci
Reading 1200 / 10000 loci
Reading 1300 / 10000 loci
Reading 1400 / 10000 loci
Reading 1500 / 10000 loci
Reading 1600 / 10000 loci
Reading 1700 / 10000 loci
Reading 1800 / 10000 loci
Reading 1900 / 10000 loci
Reading 2000 / 10000 loci
Reading 2100 / 10000 loci
Reading 2200 / 10000 loci
Reading 2300 / 10000 loci
Reading 2400 / 10000 loci
Reading 2500 / 10000 loci
Reading 2600 / 10000 loci
Reading 2700 / 10000 loci
Reading 2800 / 10000 loci
Reading 2900 / 10000 loci
Reading 3000 / 10000 loci
Reading 3100 / 10000 loci
Reading 3200 / 10000 loci
Reading 3277 / 3277 loci.
Done.
File apparently not yet accessed:
Scanning file /Users/brettell/Documents/Repositories/racist_hypothesis/data/snphits_pig_full.vcf.gz
41.17255 Mb
Done.
Reading 100 / 10000 loci
Reading 200 / 10000 loci
Reading 300 / 10000 loci
Reading 400 / 10000 loci
Reading 500 / 10000 loci
Reading 600 / 10000 loci
Reading 700 / 10000 loci
Reading 800 / 10000 loci
Reading 900 / 10000 loci
Reading 1000 / 10000 loci
Reading 1100 / 10000 loci
Reading 1200 / 10000 loci
Reading 1300 / 10000 loci
Reading 1400 / 10000 loci
Reading 1500 / 10000 loci
Reading 1600 / 10000 loci
Reading 1700 / 10000 loci
Reading 1800 / 10000 loci
Reading 1900 / 10000 loci
Reading 2000 / 10000 loci
Reading 2100 / 10000 loci
Reading 2200 / 10000 loci
Reading 2300 / 10000 loci
Reading 2400 / 10000 loci
Reading 2500 / 10000 loci
Reading 2600 / 10000 loci
Reading 2700 / 10000 loci
Reading 2800 / 10000 loci
Reading 2900 / 10000 loci
Reading 3000 / 10000 loci
Reading 3100 / 10000 loci
Reading 3200 / 10000 loci
Reading 3300 / 10000 loci
Reading 3400 / 10000 loci
Reading 3500 / 10000 loci
Reading 3600 / 10000 loci
Reading 3700 / 10000 loci
Reading 3800 / 10000 loci
Reading 3900 / 10000 loci
Reading 4000 / 10000 loci
Reading 4039 / 4039 loci.
Done.
vcfRtest <- vcfR::read.vcfR(target_vcfs[1])
Scanning file to determine attributes.
File attributes:
meta lines: 258
header_line: 259
variant count: 1270
column count: 2513
Meta line 258 read in.
All meta lines processed.
gt matrix initialized.
Character matrix gt created.
Character matrix gt rows: 1270
Character matrix gt cols: 2513
skip: 0
nrows: 1270
row_num: 0
Processed variant 1000
Processed variant: 1270
All variants processed
The following INFO key occurred more than once: AC
The following INFO key occurred more than once: AF
The following INFO key occurred more than once: DP
/nfs/software/birney/plink2 --vcf vcfs/snphits_pig.vcf.gz
mkdir racist_hypothesis/data/20200622_plink2_alfreqs
mkdir racist_hypothesis/data/20200622_plink2_alfreqs/hei
mkdir racist_hypothesis/data/20200622_plink2_alfreqs/edu
mkdir racist_hypothesis/data/20200622_plink2_alfreqs/pig
# Edu Years
/nfs/software/birney/plink2.3/plink2 \
--vcf vcfs/snphits_eduyrs.vcf.gz \
--freq \
--max-alleles 2 \
--pheno iid-only racist_hypothesis/data/plink2_sample_popn_key.txt \
--loop-cats PHENO1 \
--out racist_hypothesis/data/20200622_plink2_alfreqs/edu/edu
# Height
/nfs/software/birney/plink2.3/plink2 \
--vcf vcfs/snphits_height.vcf.gz \
--freq \
--max-alleles 2 \
--pheno iid-only racist_hypothesis/data/plink2_sample_popn_key.txt \
--loop-cats PHENO1 \
--out racist_hypothesis/data/20200622_plink2_alfreqs/hei/hei
# Pigmentation
/nfs/software/birney/plink2.3/plink2 \
--vcf vcfs/snphits_pig.vcf.gz \
--freq \
--max-alleles 2 \
--pheno iid-only racist_hypothesis/data/plink2_sample_popn_key.txt \
--loop-cats PHENO1 \
--out racist_hypothesis/data/20200622_plink2_alfreqs/pig/pig
git commitall "20200622 plink data"
target_dirs <- list.dirs(here::here("data", "20200622_plink2_alfreqs"), recursive = F)
al_freq_lst <- lapply(target_dirs, function(x){
target_files <- list.files(x, pattern = ".afreq", full.names = T)
# read in data
data_lst <- lapply(target_files, function(target_file){
read.table(target_file,
header = T,
comment.char = "")
})
# fix names of populations
names(data_lst) <- gsub(pattern = "edu.|hei.|pig.|.afreq",
replacement = "",
x = list.files(x, pattern = ".afreq"))
return(data_lst)
})
# set names
names(al_freq_lst) <- basename(target_dirs)
al_freq_df <- lapply(al_freq_df, function(pheno){
There were 11 warnings (use warnings() to see them)
dplyr::bind_rows(pheno, .id = "population")
})
Error in lapply(al_freq_df, function(pheno) { :
object 'al_freq_df' not found
counter <- 0
There were 12 warnings (use warnings() to see them)
lapply(al_freq_df, function(pheno){
counter <<- counter + 1
ggplot(pheno,
aes(YRI, CEU)) +
geom_point() +
# scale_color_viridis_c(option = colour_pals[counter]) +
coord_fixed() +
geom_smooth(se = F, colour = "red") +
geom_abline(intercept = 0, slope = 1, colour = "blue") +
xlab("Allele frequency in YRI") +
ylab("Allele frequency in CEU") +
labs(title = titles[counter])
})
$edu
$hei
$pig
counter <- 0
There were 15 warnings (use warnings() to see them)
lapply(al_freq_df, function(pheno){
counter <<- counter + 1
ggplot(pheno,
aes(YRI, CHS)) +
geom_point() +
# scale_color_viridis_c(option = colour_pals[counter]) +
coord_fixed() +
geom_smooth(se = F, colour = "red") +
geom_abline(intercept = 0, slope = 1, colour = "blue") +
xlab("Allele frequency in YRI") +
ylab("Allele frequency in CHS") +
labs(title = titles[counter])
})
$edu
$hei
$pig
set.seed(65)
rdm_sds <- sample(1:100, 3)
counter <- 0
al_freq_df_shuff <- lapply(al_freq_df, function(pheno){
counter <<- counter + 1
# set seed
set.seed(rdm_sds[counter])
# select SNPs to swap (half of total)
tgt_indcs <- sample(nrow(pheno), nrow(pheno) /2)
# swap minor alleles
pheno[tgt_indcs, 5:ncol(pheno)] <- 1 - pheno[tgt_indcs, 5:ncol(pheno)]
# return pheno
return(pheno)
})
counter <- 0
lapply(al_freq_df_shuff, function(pheno){
counter <<- counter + 1
ggplot(pheno,
aes(YRI, CHS)) +
geom_point() +
# scale_color_viridis_c(option = colour_pals[counter]) +
coord_fixed() +
geom_smooth(se = F, colour = "red") +
geom_abline(intercept = 0, slope = 1, colour = "blue") +
xlab("Allele frequency in YRI") +
ylab("Allele frequency in CHS") +
labs(title = titles[counter])
})
$edu
$hei
$pig
target_vcfs
[1] "/Users/brettell/Documents/Repositories/racist_hypothesis/data/snphits_eduyrs.vcf.gz"
[2] "/Users/brettell/Documents/Repositories/racist_hypothesis/data/snphits_height.vcf.gz"
[3] "/Users/brettell/Documents/Repositories/racist_hypothesis/data/snphits_pig.vcf.gz"
# Create raw list of variants
vcf_list_raw <- lapply(target_vcfs, function(vcf_file){
vcf_out <- pegas::read.vcf(vcf_file)
})
Reading 100 / 10000 loci
Reading 200 / 10000 loci
Reading 300 / 10000 loci
Reading 400 / 10000 loci
Reading 500 / 10000 loci
Reading 600 / 10000 loci
Reading 700 / 10000 loci
Reading 800 / 10000 loci
Reading 900 / 10000 loci
Reading 1000 / 10000 loci
Reading 1100 / 10000 loci
Reading 1200 / 10000 loci
Reading 1270 / 1270 loci.
Done.
Reading 100 / 10000 loci
Reading 200 / 10000 loci
Reading 300 / 10000 loci
Reading 400 / 10000 loci
Reading 500 / 10000 loci
Reading 600 / 10000 loci
Reading 700 / 10000 loci
Reading 800 / 10000 loci
Reading 900 / 10000 loci
Reading 1000 / 10000 loci
Reading 1100 / 10000 loci
Reading 1200 / 10000 loci
Reading 1300 / 10000 loci
Reading 1400 / 10000 loci
Reading 1500 / 10000 loci
Reading 1600 / 10000 loci
Reading 1700 / 10000 loci
Reading 1800 / 10000 loci
Reading 1900 / 10000 loci
Reading 2000 / 10000 loci
Reading 2100 / 10000 loci
Reading 2200 / 10000 loci
Reading 2300 / 10000 loci
Reading 2400 / 10000 loci
Reading 2500 / 10000 loci
Reading 2600 / 10000 loci
Reading 2700 / 10000 loci
Reading 2800 / 10000 loci
Reading 2900 / 10000 loci
Reading 3000 / 10000 loci
Reading 3100 / 10000 loci
Reading 3200 / 10000 loci
Reading 3277 / 3277 loci.
Done.
File apparently not yet accessed:
Scanning file /Users/brettell/Documents/Repositories/racist_hypothesis/data/snphits_pig.vcf.gz
2.696805 Mb
Done.
Reading 100 / 10000 loci
Reading 200 / 10000 loci
Reading 261 / 261 loci.
Done.
# Create vector of populations
populations <- unlist(lapply(rownames(vcf_list_raw[[1]]), function(sample){
meta$Population[meta$Sample == sample]
}))
# Generate Fst stats
fst_out_lst <- lapply(vcf_list_raw, function(pheno){
as.data.frame(pegas::Fst(pheno, pop = populations))
})
# make rownames into separate column
fst_out_lst <- lapply(fst_out_lst, function(pheno){
pheno$snp <- rownames(pheno)
return(pheno)
})
names(fst_out_lst) <- titles
# bind into single DF
fst_out_df <- dplyr::bind_rows(fst_out_lst, .id = "phenotype")
head(fst_out_df)
ggplot(fst_out_df, aes(Fst, fill = phenotype)) +
geom_density(alpha = 0.7) +
labs(fill = "Phenotype") +
ylab("Density") +
theme_bw() +
scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))
# get samples from target popns only
target_popns <- which(populations %in% c("YRI", "CEU", "CHS"))
populations_3pop <- populations[target_popns]
vcf_list_raw_3pop <- lapply(vcf_list_raw, function(pheno){
pheno[target_popns, ]
})
# Generate Fst stats
fst_out_lst_3pop <- lapply(vcf_list_raw_3pop, function(pheno){
as.data.frame(pegas::Fst(pheno, pop = populations_3pop))
})
# make rownames into separate column
fst_out_lst_3pop <- lapply(fst_out_lst_3pop, function(pheno){
pheno$snp <- rownames(pheno)
return(pheno)
})
names(fst_out_lst_3pop) <- titles
# bind into single DF
fst_out_df_3pop <- dplyr::bind_rows(fst_out_lst_3pop, .id = "phenotype")
head(fst_out_df_3pop)
ggplot(fst_out_df_3pop, aes(Fst, fill = phenotype)) +
There were 16 warnings (use warnings() to see them)
geom_density(alpha = 0.7) +
labs(fill = "Phenotype") +
ylab("Density") +
theme_bw() +
scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))
# factorise
fst_out_df_3pop$phenotype <- factor(fst_out_df_3pop$phenotype,
levels = c("Pigmentation", "Height", "Educational Attainment"))
ggplot() +
geom_density_ridges2(data = fst_out_df_3pop,
mapping = aes(x = Fst, y = phenotype, fill = phenotype),
scale = 0.5,
alpha = 0.8) +
scale_fill_manual(values = c("#FC4E07", "#00AFBB", "#E7B800")) +
ylab(label = NULL) +
theme_bw() #+
theme_ridges(center = T)
List of 93
$ line :List of 6
..$ colour : chr "black"
..$ size : num 0.636
..$ linetype : num 1
..$ lineend : chr "butt"
..$ arrow : logi FALSE
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_line" "element"
$ rect :List of 5
..$ fill : chr "transparent"
..$ colour : logi NA
..$ size : num 0
..$ linetype : num 0
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_rect" "element"
$ text :List of 11
..$ family : chr ""
..$ face : chr "plain"
..$ colour : chr "black"
..$ size : num 14
..$ hjust : num 0.5
..$ vjust : num 0.5
..$ angle : num 0
..$ lineheight : num 0.9
..$ margin : 'margin' num [1:4] 0points 0points 0points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : logi FALSE
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ title : NULL
$ aspect.ratio : NULL
$ axis.title : NULL
$ axis.title.x :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 0.5
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 6points 0points 3points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.title.x.top :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : NULL
..$ vjust : num 0
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 0points 3.5points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.title.x.bottom : NULL
$ axis.title.y :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 0.5
..$ vjust : NULL
..$ angle : num 90
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 6points 0points 3points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.title.y.left : NULL
$ axis.title.y.right :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : NULL
..$ vjust : num 0
..$ angle : num -90
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 0points 0points 3.5points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.text :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : chr "black"
..$ size : num 12
..$ hjust : NULL
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.text.x :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : NULL
..$ vjust : num 1
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 3points 0points 0points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.text.x.top :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : NULL
..$ vjust : num 0
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 0points 2.8points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.text.x.bottom : NULL
$ axis.text.y :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 1
..$ vjust : num 0
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 3points 0points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.text.y.left : NULL
$ axis.text.y.right :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 0
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 0points 0points 2.8points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.ticks :List of 6
..$ colour : chr "grey90"
..$ size : num 0.5
..$ linetype : NULL
..$ lineend : NULL
..$ arrow : logi FALSE
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_line" "element"
$ axis.ticks.x : NULL
$ axis.ticks.x.top : NULL
$ axis.ticks.x.bottom : NULL
$ axis.ticks.y :List of 6
..$ colour : chr "grey90"
..$ size : num 0.5
..$ linetype : NULL
..$ lineend : NULL
..$ arrow : logi FALSE
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_line" "element"
$ axis.ticks.y.left : NULL
$ axis.ticks.y.right : NULL
$ axis.ticks.length : 'simpleUnit' num 3.5points
..- attr(*, "unit")= int 8
$ axis.ticks.length.x : NULL
$ axis.ticks.length.x.top : NULL
$ axis.ticks.length.x.bottom: NULL
$ axis.ticks.length.y : NULL
$ axis.ticks.length.y.left : NULL
$ axis.ticks.length.y.right : NULL
$ axis.line : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ axis.line.x : NULL
$ axis.line.x.top : NULL
$ axis.line.x.bottom : NULL
$ axis.line.y : NULL
$ axis.line.y.left : NULL
$ axis.line.y.right : NULL
$ legend.background :List of 5
..$ fill : NULL
..$ colour : logi NA
..$ size : NULL
..$ linetype : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_rect" "element"
$ legend.margin : 'margin' num [1:4] 7points 7points 7points 7points
..- attr(*, "unit")= int 8
$ legend.spacing : 'simpleUnit' num 14points
..- attr(*, "unit")= int 8
$ legend.spacing.x : NULL
$ legend.spacing.y : NULL
$ legend.key : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ legend.key.size : 'simpleUnit' num 1lines
..- attr(*, "unit")= int 3
$ legend.key.height : NULL
$ legend.key.width : NULL
$ legend.text :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : 'rel' num 0.857
..$ hjust : NULL
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ legend.text.align : NULL
$ legend.title :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 0
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ legend.title.align : NULL
$ legend.position : chr "right"
$ legend.direction : NULL
$ legend.justification : chr [1:2] "left" "center"
$ legend.box : NULL
$ legend.box.just : NULL
$ legend.box.margin : 'margin' num [1:4] 0cm 0cm 0cm 0cm
..- attr(*, "unit")= int 1
$ legend.box.background : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ legend.box.spacing : 'simpleUnit' num 14points
..- attr(*, "unit")= int 8
$ panel.background : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ panel.border : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ panel.spacing : 'simpleUnit' num 7points
..- attr(*, "unit")= int 8
$ panel.spacing.x : NULL
$ panel.spacing.y : NULL
$ panel.grid :List of 6
..$ colour : chr "white"
..$ size : NULL
..$ linetype : NULL
..$ lineend : NULL
..$ arrow : logi FALSE
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_line" "element"
$ panel.grid.major :List of 6
..$ colour : chr "grey90"
..$ size : num 0.5
..$ linetype : NULL
..$ lineend : NULL
..$ arrow : logi FALSE
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_line" "element"
$ panel.grid.minor : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ panel.grid.major.x : NULL
$ panel.grid.major.y : NULL
$ panel.grid.minor.x : NULL
$ panel.grid.minor.y : NULL
$ panel.ontop : logi FALSE
$ plot.background : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ plot.title :List of 11
..$ family : NULL
..$ face : chr "bold"
..$ colour : NULL
..$ size : num 14
..$ hjust : num 0
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 0points 7points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ plot.title.position : chr "panel"
$ plot.subtitle :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : 'rel' num 0.857
..$ hjust : num 0
..$ vjust : num 1
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 0points 6points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ plot.caption :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : 'rel' num 0.857
..$ hjust : num 1
..$ vjust : num 1
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 6points 0points 0points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ plot.caption.position : chr "panel"
$ plot.tag :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : 'rel' num 1.2
..$ hjust : num 0.5
..$ vjust : num 0.5
..$ angle : NULL
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ plot.tag.position : chr "topleft"
$ plot.margin : 'margin' num [1:4] 7points 14points 7points 7points
..- attr(*, "unit")= int 8
$ strip.background :List of 5
..$ fill : chr "grey80"
..$ colour : chr "grey50"
..$ size : num 0
..$ linetype : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_rect" "element"
$ strip.background.x : NULL
$ strip.background.y : NULL
$ strip.placement : chr "inside"
$ strip.text :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : 'rel' num 0.857
..$ hjust : NULL
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ strip.text.x : NULL
$ strip.text.y :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : NULL
..$ vjust : NULL
..$ angle : num -90
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ strip.switch.pad.grid : 'simpleUnit' num 3.5points
..- attr(*, "unit")= int 8
$ strip.switch.pad.wrap : 'simpleUnit' num 3.5points
..- attr(*, "unit")= int 8
$ strip.text.y.left :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : NULL
..$ vjust : NULL
..$ angle : num 90
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi TRUE
- attr(*, "validate")= logi TRUE
# get lm for data
height_lm <- lm(CHS ~ 0 + YRI + CEU, data = al_freq_df$hei)
graph_reso <- 0.05
# set up axes
axis_x <- seq(min(al_freq_df$hei$YRI), max(al_freq_df$hei$YRI), by = graph_reso)
axis_y <- seq(min(al_freq_df$hei$CEU), max(al_freq_df$hei$CEU), by = graph_reso)
# sample points
height_lm_surface <- expand.grid(YRI = axis_x,
CEU = axis_y,
KEEP.OUT.ATTRS = F)
height_lm_surface$CHS <- predict.lm(height_lm, newdata = height_lm_surface)
height_lm_surface_cast <- reshape2::acast(height_lm_surface, CEU ~ YRI, value.var = "CHS")
With assistance from here: https://stackoverflow.com/questions/38331198/add-regression-plane-to-3d-scatter-plot-in-plotly.
# plot
height_plot <- plot_ly(al_freq_df$hei,
x = ~CEU,
y = ~YRI,
z = ~CHS,
type = "scatter3d",
mode = "markers",
marker = list(size = 2))
# add surface
height_plot <- add_trace(p = height_plot,
z = height_lm_surface,
x = axis_x,
y = axis_y,
type = "surface")
height_plot
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
# get lm for data
height_loess <- loess(CHS ~ 0 + YRI + CEU, data = al_freq_df$hei)
graph_reso <- 0.05
# set up axes
axis_x <- seq(min(al_freq_df$hei$YRI), max(al_freq_df$hei$YRI), by = graph_reso)
axis_y <- seq(min(al_freq_df$hei$CEU), max(al_freq_df$hei$CEU), by = graph_reso)
# sample points
height_lm_surface <- expand.grid(YRI = axis_x,
CEU = axis_y,
KEEP.OUT.ATTRS = F)
height_lm_surface$CHS <- predict(height_loess, newdata = height_lm_surface)
height_lm_surface <- acast(height_lm_surface, CEU ~ YRI, value.var = "CHS")
# plot
height_plot <- plot_ly(al_freq_df$hei,
x = ~CEU,
y = ~YRI,
z = ~CHS,
type = "scatter3d",
mode = "markers",
marker = list(size = 2))
# add surface
height_plot <- add_trace(p = height_plot,
z = height_lm_surface,
x = axis_x,
y = axis_y,
type = "surface")
height_plot
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
# get lm for data
height_loess <- loess(CHS ~ 0 + YRI + CEU, data = al_freq_df_shuff$hei)
graph_reso <- 0.05
# set up axes
axis_x <- seq(min(al_freq_df_shuff$hei$YRI), max(al_freq_df_shuff$hei$YRI), by = graph_reso)
axis_y <- seq(min(al_freq_df_shuff$hei$CEU), max(al_freq_df_shuff$hei$CEU), by = graph_reso)
# sample points
height_lm_surface <- expand.grid(YRI = axis_x,
CEU = axis_y,
KEEP.OUT.ATTRS = F)
height_lm_surface$CHS <- predict(height_loess, newdata = height_lm_surface)
height_lm_surface <- acast(height_lm_surface, CEU ~ YRI, value.var = "CHS")
# plot
height_plot <- plot_ly(al_freq_df_shuff$hei,
x = ~CEU,
y = ~YRI,
z = ~CHS,
type = "scatter3d",
mode = "markers",
marker = list(size = 2))
# add surface
height_plot <- add_trace(p = height_plot,
z = height_lm_surface,
x = axis_x,
y = axis_y,
type = "surface")
height_plot
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
# get lm for data
height_loess <- loess(YRI ~ 0 + CEU + CHS, data = al_freq_df_shuff$hei)
graph_reso <- 0.05
# set up axes
axis_x <- seq(min(al_freq_df_shuff$hei$CEU), max(al_freq_df_shuff$hei$CEU), by = graph_reso)
axis_y <- seq(min(al_freq_df_shuff$hei$CHS), max(al_freq_df_shuff$hei$CHS), by = graph_reso)
# sample points
height_lm_surface <- expand.grid(CEU = axis_x,
CHS = axis_y,
KEEP.OUT.ATTRS = F)
height_lm_surface$YRI <- predict(height_loess, newdata = height_lm_surface)
height_lm_surface <- acast(height_lm_surface, CHS ~ CEU, value.var = "YRI")
# plot
height_plot <- plot_ly(al_freq_df_shuff$hei,
x = ~CEU,
y = ~CHS,
z = ~YRI,
type = "scatter3d",
mode = "markers",
marker = list(size = 2))
# add surface
height_plot <- add_trace(p = height_plot,
z = height_lm_surface,
x = axis_x,
y = axis_y,
type = "surface")
height_plot
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
lapply(al_freq_df_shuff, function(pheno){
# set graph resolution
graph_reso <- 0.05
# get lm for data
loess_model <- loess(YRI ~ 0 + CEU + CHS, data = pheno)
# set up axes
axis_x <- seq(min(pheno$CEU), max(pheno$CEU), by = graph_reso)
axis_y <- seq(min(pheno$CHS), max(pheno$CHS), by = graph_reso)
# sample points
lm_surface <- expand.grid(CEU = axis_x,
CHS = axis_y,
KEEP.OUT.ATTRS = F)
lm_surface$YRI <- predict(loess_model, newdata = lm_surface)
lm_surface <- acast(lm_surface, CHS ~ CEU, value.var = "YRI")
# create plot
plt <- plot_ly(pheno,
x = ~CEU,
y = ~CHS,
z = ~YRI,
type = "scatter3d",
mode = "markers",
marker = list(size = 2),
text = pheno$ID)
# add surface
plt <- add_trace(p = plt,
z = lm_surface,
x = axis_x,
y = axis_y,
type = "surface")
plt
})
$edu
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
$hei
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
$pig
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
'surface' objects don't have these attributes: 'mode', 'marker'
Valid attributes include:
'type', 'visible', 'legendgroup', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverlabel', 'stream', 'uirevision', 'z', 'x', 'y', 'text', 'hovertext', 'hovertemplate', 'connectgaps', 'surfacecolor', 'cauto', 'cmin', 'cmax', 'cmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'contours', 'hidesurface', 'lightposition', 'lighting', 'opacity', '_deprecated', 'hoverinfo', 'showlegend', 'xcalendar', 'ycalendar', 'zcalendar', 'scene', 'idssrc', 'customdatasrc', 'metasrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'surfacecolorsrc', 'hoverinfosrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
NA
colourscales <- c("Viridis", "Hot", "Electric")
titles <- c("Educational Attainment", "Height", "Skin/hair pigmentation")
counter <- 0
lapply(al_freq_df_shuff, function(pheno){
counter <<- counter + 1
# set graph resolution
graph_reso <- 0.05
# get lm for data
loess_model <- loess(CEU ~ 0 + CHS + YRI, data = pheno)
# set up axes
axis_x <- seq(min(pheno$CHS), max(pheno$CHS), by = graph_reso)
axis_y <- seq(min(pheno$YRI), max(pheno$YRI), by = graph_reso)
# sample points
lm_surface <- expand.grid(CHS = axis_x,
YRI = axis_y,
KEEP.OUT.ATTRS = F)
lm_surface$CEU <- predict(loess_model, newdata = lm_surface)
lm_surface <- acast(lm_surface, YRI ~ CHS, value.var = "CEU")
# create plot
plt <- plot_ly(pheno,
x = ~CHS,
y = ~YRI,
z = ~CEU,
type = "scatter3d",
mode = "markers",
marker = list(size = 2),
text = pheno$ID)
plt <- add_trace(plt,
z = lm_surface,
x = axis_x,
y = axis_y,
type = "surface",
colorscale = colourscales[counter]) %>%
layout(title = titles[counter])
plt
})
$edu
$hei
$pig
NA